#include <iostream>

//this define can avoid some logs which you don't need to care about.
#define LOGGER_LEVEL LL_WARN
#include "Configuration.h"
#include "Simhasher.h"
#include "SplitToolCppJieba.h"
#include "PageLibPreprocessor.h"


int main(int argc, char *argv[]) {
    printf("加载配置文件\n");
    auto &config = Configuration::getInstance();
    Simhasher simhasher(config);
    SplitToolCppJieba splitToolCppJieba(config);
    PageLibPreprocessor pageLibPreprocessor( &simhasher, &splitToolCppJieba);

    printf("读取网页库\n");
    pageLibPreprocessor.readInfoFromFile("/home/chizuru/search-engines/data/newripepage.dat");

    printf("计算hash\n");
    pageLibPreprocessor.doProcess();

    printf("去重中...\n");
    pageLibPreprocessor.cutRedundantPages();

    printf("计算倒排索引\n");
    pageLibPreprocessor.buildInvertIndexTable();

    printf("正在保存文件\n");
    pageLibPreprocessor.storeOnDisk("/home/chizuru/search-engines/data/newripepage2.dat",
                                    "/home/chizuru/search-engines/data/newoffset.dat",
                                    "/home/chizuru/search-engines/data/invertIndex.dat");

    return EXIT_SUCCESS;
}
